clear all
set more off
capture log close
set matsize 11000 

*---- Set Directory ----*
global rootDir = "/Users/AliceZhang/Dropbox/Research_Columbia/Renewables Voting (Urpelainen Zhang)/JOP/UZ_JOP2021_Replication"
global dataDir = "Data"
global resultDir = "Results"
global logDir = "Analysis/logSTATA"
global graphDir = "Results/Figures"

*---- Log File Path ----*
cd "$rootDir/$logDir"
log using 004_analysis_environment, replace


*******************************************************************************
/*				          URPELAINEN & ZHANG 2021	        				 */
*******************************************************************************

/* 

File Name:    	004_analysis_environment.do

By:				Alice Tianbo Zhang (alice.tianbo.zhang@gmail.com)

Last Edited:  	10/11/2021

Purpose:		

Data Used:      votes_wind_panel.dta
				ACS_panel_balanceTest_recodeVar.dta

Program Used:   - reghdfe -
				- coefplot -
				- grc1legv -

*/


*******************************************************************************
/*               					TABLE 6								     */
*******************************************************************************

** Load wind votes panel
cd "$rootDir/$dataDir/Final"
use votes_wind_panel.dta, clear

** Create instrument and fixed effects
gen t = year - 2003
gen inter = t * mean_wp

egen stateyear_fixed = group(state year)
egen district_fixed = group(state district)

gen cum_lncapacity_turbine = log(cum_capacity_turbine + 1)
gen cum_lncount_turbine = log(cum_count_turbine +1 )


*------------ Main results: By pro- or anti- environmental voting ------------*
local outcome pro_env anti_env // outcome variable 
local endogenous cum_capacity_turbine cum_count_turbine cum_lncapacity_turbine cum_lncount_turbine // endogenous variable
local instrument inter // instrument
local admin1_trend stateyear_fixed // geography * time trend
local admin2 district_fixed // panel unit (cluster variable)

/* List of variables:
		
  1. outcome 		- dependent variable
  2. endogenous 	- independent variable (endogeous) to be instrumented
  3. instrument 	- instrument
  4. admin1_trend 	- admin1-specific time trend (i.e. state by year fixed effect) 
  5. admin2 		- panel unit and cluster variable (i.e congressional district fixed effect)

*/

foreach y in `outcome' {
	// Create outcome variable label for storing estimates
	tokenize "`y'", parse("_")
	local y_name "`1'"
	di "`y_name'"
	
	foreach x in `endogenous' {
			// Create endogenous variable label for storing estimates
			tokenize "`x'", parse("_")
			local x_name "`3'"
			di "`x_name'"
			
			// Run IV regression
			reghdfe `y' (`x' = `instrument'), absorb(`admin1_trend' `admin2') ffirst stages(first ols reduced) vce(cluster `admin2') old
			
			// Store IV, first stage, OLS, reduced form estimates
			estimates store `y_name'_`x_name'_iv
			estimates restore reghdfe_first1
			estimates store `y_name'_`x_name'_first
			estimates restore reghdfe_ols
			estimates store `y_name'_`x_name'_ols
			estimates restore reghdfe_reduced
			estimates store `y_name'_`x_name'_reduced
		}
}

*--------------------- Export LaTeX regression tables -----------------------*
cd "$rootDir/$resultDir/Tables"

** First stage regressions (same across all outcome variables)
esttab pro_capacity_first pro_count_first pro_lncapacity_first pro_lncount_first using Table2.tex, booktabs replace ///
		b(%9.3f) stats(N N_clust r2 F, labels("Observations" "Districts" "\(R^{2}\)" "\(F\)-statistic") fmt(0 0 2 2)) ///
		eqlabels(none) noconstant se nonotes unstack legend star(* 0.10 ** 0.05 *** 0.01) ///
		varlabels(inter "Mean wind potential * time") varwidth(27) modelwidth(13) ///
		mtitles("Capacity" "Count" "log(Capacity)" "log(Count)") ///
		width(\hsize)

		
** Compare OLS and IV estimates
* OLS
esttab pro_capacity_ols pro_count_ols anti_capacity_ols anti_count_ols using Table6.tex, booktabs replace ///
		refcat(cum_capacity_turbine "\emph{Panel A: OLS}", nolabel) ///
		b(%9.3f) se noconstant noobs nonotes star(* 0.10 ** 0.05 *** 0.01) ///
		varlabels(cum_capacity_turbine "Cumulative capacity (MW)" cum_count_turbine "Cumulative count") varwidth(27) modelwidth(13) ///
		mtitles("Model" "Model" "Model" "Model") ///
		mgroups("Pro-Environment Vote" "Anti-Environment Vote", pattern(1 0 1 0) prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat(\cmidrule(lr){@span})) ///
		width(\hsize)
		
* IV
esttab pro_capacity_iv pro_count_iv anti_capacity_iv anti_count_iv using Table6.tex, booktabs append ///
		nomtitles se noconstant nonotes legend nonumbers collabels(none) star(* 0.10 ** 0.05 *** 0.01) ///
		b(%9.3f) stats(N N_clust r2, labels("Observations" "Districts" "\(R^{2}\)") fmt(0 0 2)) ///
		varlabels(cum_capacity_turbine "Cumulative capacity (MW)" cum_count_turbine "Cumulative count") varwidth(27) modelwidth(13) ///
		refcat(cum_capacity_turbine "\emph{Panel B: IV}", nolabel) ///
		width(\hsize)
		

*******************************************************************************
/*          						FIGURE 4								 */
*******************************************************************************
local outcome pro_env_drilling pro_env_lands pro_env_other pro_env_toxics pro_env_water pro_env_wildlife pro_env_dirty_energy pro_env_air pro_env_climate_change pro_env_clean_energy pro_env_oceans // issue vote outcome variable 
local endogenous cum_capacity_turbine cum_count_turbine cum_lncapacity_turbine cum_lncount_turbine // endogenous variable
local instrument inter // instrument
local admin1_trend stateyear_fixed // geography * time trend
local admin2 district_fixed // panel unit (cluster variable)


foreach y in `outcome' {
	// Create outcome variable label for storing estimates
	tokenize "`y'", parse("_")
	local y_name "`5'"
	di "`y_name'"
	
	foreach x in `endogenous' {
			// Create endogenous variable label for storing estimates
			tokenize "`x'", parse("_")
			local x_name "`3'"
			di "`x_name'"
			
			// Run IV regression
			reghdfe `y' (`x' = `instrument'), absorb(`admin1_trend' `admin2') ffirst stages(first ols reduced) vce(cluster `admin2') old
			
			// Store IV, first stage, OLS, reduced form estimates
			estimates store `y_name'_`x_name'_iv
			estimates restore reghdfe_first1
			estimates store `y_name'_`x_name'_first
			estimates restore reghdfe_ols
			estimates store `y_name'_`x_name'_ols
			estimates restore reghdfe_reduced
			estimates store `y_name'_`x_name'_reduced
			di "`y_name'_`x_name'" 
		}
}

** Coefplots of issues by independent variable 
coefplot (drilling_capacity_iv, label(drilling)) (lands_capacity_iv,label(land)) (toxics_capacity_iv, label(toxics)) ///
		 (water_capacity_iv, label(water)) (wildlife_capacity_iv, label(wildlife)) (clean_capacity_iv, label(clean energy)) ///
		 (dirty_capacity_iv, label(dirty energy)) (climate_capacity_iv, label(climate change)) ///
		 , drop(_cons) coeflabel(cum_capacity_turbine = " ", notick) byopts(xrescale) ciopts(recast(rcap)) xline(0) graphregion(color(white)) nolabels ///
		 name(issue_capacity, replace) xscale(range(-0.05 0.15)) xlabel(-0.05(0.05)0.15)  legend(cols(1) pos(3) rowgap(3) symplacement(left) size(small)) ///
		 title("Capacity", color(black) size(small) box bexpand bcolor(gs15) lcolor(black))
	
coefplot (drilling_count_iv, label(drilling)) (lands_count_iv,label(land)) (toxics_count_iv, label(toxics)) ///
		 (water_count_iv, label(water)) (wildlife_count_iv, label(wildlife)) (clean_count_iv, label(clean energy)) ///
		 (dirty_count_iv, label(dirty energy)) (climate_count_iv, label(climate change)) ///
		 , drop(_cons) coeflabel(cum_count_turbine = " ", notick) byopts(xrescale) ciopts(recast(rcap)) xline(0) graphregion(color(white)) nolabels ///
		 name(issue_count, replace) xscale(range(-0.05 0.15)) xlabel(-0.05(0.05)0.15) ///
		 title("Count", color(black) size(small) box bexpand bcolor(gs15) lcolor(black))
	
coefplot (drilling_lncapacity_iv, label(drilling)) (lands_lncapacity_iv,label(land)) (toxics_lncapacity_iv, label(toxics)) ///
		 (water_lncapacity_iv, label(water)) (wildlife_lncapacity_iv, label(wildlife)) (clean_lncapacity_iv, label(clean energy)) ///
		 (dirty_lncapacity_iv, label(dirty energy)) (climate_lncapacity_iv, label(climate change))  ///
		 , drop(_cons) coeflabel(cum_lncapacity_turbine = " ", notick) byopts(xrescale) ciopts(recast(rcap)) xline(0) graphregion(color(white)) ///
		 name(issue_lncapacity, replace) xscale(range(-15 20)) xlabel(-15(10)25) ///
		 title("log(Capacity)", color(black) size(small) box bexpand bcolor(gs15) lcolor(black))
	
coefplot (drilling_lncount_iv, label(drilling)) (lands_lncount_iv,label(land)) (toxics_lncount_iv, label(toxics)) ///
		 (water_lncount_iv, label(water)) (wildlife_lncount_iv, label(wildlife)) (clean_lncount_iv, label(clean energy)) ///
		 (dirty_lncount_iv, label(dirty energy)) (climate_lncount_iv, label(climate change)) ///
		 , drop(_cons) coeflabel(cum_lncount_turbine = " ", notick) byopts(xrescale) ciopts(recast(rcap)) xline(0) graphregion(color(white)) ///
		 name(issue_lncount, replace) xscale(range(-15 20)) xlabel(-15(10)25) ///
		 title("log(Count)", color(black) size(small) box bexpand bcolor(gs15) lcolor(black))

** Combine coefplots and save
grc1leg issue_capacity issue_count issue_lncapacity issue_lncount, legendfrom(issue_capacity) position(3) graphregion(color(white)) span

cd "$rootDir/$graphDir"
graph export fg4.pdf, replace


*******************************************************************************
/*       						Table A20									 */
*******************************************************************************
** Load wind votes panel
cd "$rootDir/$dataDir/Final"
use votes_wind_panel.dta, clear

** Create instrument and fixed effects
gen t = year - 2003
gen inter = t * mean_wp

egen stateyear_fixed = group(state year)
egen district_fixed = group(state district)

gen cum_lncapacity_turbine = log(cum_capacity_turbine + 1)
gen cum_lncount_turbine = log(cum_count_turbine +1 )

encode name, generate(legislator) 

*------------ Main results: By pro- or anti- environmental voting ------------*
local outcome pro_env anti_env // outcome variable 
local endogenous cum_capacity_turbine cum_count_turbine cum_lncapacity_turbine cum_lncount_turbine // endogenous variable
local instrument inter // instrument
local admin1_trend stateyear_fixed // geography * time trend
local admin2 district_fixed // panel unit (cluster variable)
local individual legislator // elected official fixed effects

/* List of variables:
		
  1. outcome 		- dependent variable
  2. endogenous 	- independent variable (endogeous) to be instrumented
  3. instrument 	- instrument
  4. admin1_trend 	- admin1-specific time trend (i.e. state by year fixed effect) 
  5. admin2 		- panel unit and cluster variable (i.e congressional district fixed effect)

*/

foreach y in `outcome' {
	// Create outcome variable label for storing estimates
	tokenize "`y'", parse("_")
	local y_name "`1'"
	di "`y_name'"
	
	foreach x in `endogenous' {
			// Create endogenous variable label for storing estimates
			tokenize "`x'", parse("_")
			local x_name "`3'"
			di "`x_name'"
			
			// Run IV regression
			reghdfe `y' (`x' = `instrument'), absorb(`admin1_trend' `admin2' `individual') ffirst stages(first ols reduced) vce(cluster `admin2') old
			
			// Store IV, first stage, OLS, reduced form estimates
			estimates store `y_name'_`x_name'_iv
			estimates restore reghdfe_first1
			estimates store `y_name'_`x_name'_first
			estimates restore reghdfe_ols
			estimates store `y_name'_`x_name'_ols
			estimates restore reghdfe_reduced
			estimates store `y_name'_`x_name'_reduced
		}
}

*--------------------- Export LaTeX regression tables -----------------------*
cd "$rootDir/$resultDir/Tables"
	
* OLS
esttab pro_lncapacity_ols pro_lncount_ols anti_lncapacity_ols anti_lncount_ols using TableA20.tex, booktabs replace ///
		refcat(cum_lncapacity_turbine "\emph{Panel A: OLS}", nolabel) ///
		b(%9.3f) se noconstant noobs nonotes star(* 0.10 ** 0.05 *** 0.01) ///
		varlabels(cum_lncapacity_turbine "log(Cumulative capacity+1)" cum_lncount_turbine "log(Cumulative count+1)") varwidth(27) modelwidth(13) ///
		mtitles("Model" "Model" "Model" "Model") ///
		mgroups("Pro-Environment Vote" "Anti-Environment Vote", pattern(1 0 1 0) prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat(\cmidrule(lr){@span})) ///
		width(\hsize)
		
* IV
esttab pro_lncapacity_iv pro_lncount_iv anti_lncapacity_iv anti_lncount_iv using TableA20.tex, booktabs append ///
		refcat(cum_lncapacity_turbine "\emph{Panel B: IV}", nolabel) ///
		nomtitles se noconstant nonotes legend nonumbers collabels(none) star(* 0.10 ** 0.05 *** 0.01) ///
		b(%9.3f) stats(N N_clust r2, labels("Observations" "Districts" "\(R^{2}\)") fmt(0 0 2)) ///
		varlabels(cum_lncapacity_turbine "log(Cumulative capacity+1)" cum_lncount_turbine "log(Cumulative count+1)") varwidth(27) modelwidth(13) ///
		width(\hsize)


*******************************************************************************
/*       						 TABLE A15									 */
*******************************************************************************
cd "$rootDir/$dataDir/Final"
use ACS_panel_balanceTest_recodeVar.dta, clear 

** Create instrument and fixed effects
gen t = year - 2005
gen inter = t * mean_wp

egen stateyear_fixed = group(state year)
egen district_fixed = group(state district)

gen cum_lncapacity_turbine = log(cum_capacity_turbine + 1)
gen cum_lncount_turbine = log(cum_count_turbine +1 )

*gen pop_density = pop/dist_area

** Include balance variable as controls
reghdfe pro_env pop white home_median (cum_capacity_turbine=inter), absorb(stateyear_fixed district_fixed) vce(cluster district_fixed) old
estimates store pro_capacityB_iv

reghdfe pro_env pop white home_median (cum_count_turbine=inter), absorb(stateyear_fixed district_fixed) vce(cluster district_fixed) old
estimates store pro_countB_iv

reghdfe anti_env pop white home_median (cum_capacity_turbine=inter), absorb(stateyear_fixed district_fixed) vce(cluster district_fixed) old
estimates store anti_capacityB_iv

reghdfe anti_env pop white home_median (cum_count_turbine=inter), absorb(stateyear_fixed district_fixed) vce(cluster district_fixed) old
estimates store anti_countB_iv


*--------------------- Export LaTeX regression tables -----------------------*
cd "$rootDir/$resultDir/Tables"

** IV estimates only
esttab pro_capacityB_iv pro_countB_iv anti_capacityB_iv anti_countB_iv using TableA15.tex, booktabs replace ///
		refcat(cum_capacity_turbine "\emph{Panel: IV}", nolabel) ///
		b(%9.3f) se noconstant nonotes legend star(* 0.10 ** 0.05 *** 0.01) ///
		varlabels(cum_capacity_turbine "Cumulative capacity (MW)" pop "Total population (thousand)" white "White (\%)" home_median "Median gross rent (\textdollar)" cum_count_turbine "Cumulative count", ///
		elist(cum_capacity_turbine \addlinespace  cum_count_turbine \addlinespace  pop \addlinespace  white \addlinespace)) ///
		varwidth(27) modelwidth(13) order(cum_capacity_turbine cum_count_turbine pop white home_median) ///
		mtitles("Model" "Model" "Model" "Model") ///
		stats(N N_clust r2, labels("Observations" "Districts" "\(R^{2}\)") fmt(0 0 2)) ///
		mgroups("Pro-Environment Vote" "Anti-Environment Vote", pattern(1 0 1 0) prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat(\cmidrule(lr){@span})) ///
		width(\hsize)

*******************************************************************************
/*       							Table A17								 */
*******************************************************************************
cd "$rootDir/$dataDir/Final"
use votes_wind_panel.dta, clear

** Create instrument and fixed effects
gen t = year - 2003
gen inter_median = t * median_wp
gen inter_max = t * max_wp

egen stateyear_fixed = group(state year)
egen district_fixed = group(state district)

** Compare median and max zonal wind potential as IV
foreach var in median max {
reghdfe pro_env (cum_capacity_turbine=inter_`var'), absorb(stateyear_fixed district_fixed) vce(cluster district_fixed) old
estimates store pro_capacity_`var'_iv

reghdfe pro_env (cum_count_turbine=inter_`var'), absorb(stateyear_fixed district_fixed) vce(cluster district_fixed) old
estimates store pro_count_`var'_iv

reghdfe anti_env (cum_capacity_turbine=inter_`var'), absorb(stateyear_fixed district_fixed) vce(cluster district_fixed) old
estimates store anti_capacity_`var'_iv

reghdfe anti_env (cum_count_turbine=inter_`var'), absorb(stateyear_fixed district_fixed) vce(cluster district_fixed) old
estimates store anti_count_`var'_iv
}


*--------------------- Export LaTeX regression tables -----------------------*
cd "$rootDir/$resultDir/Tables"

** Median
esttab pro_capacity_median_iv pro_count_median_iv anti_capacity_median_iv anti_count_median_iv using TableA17.tex, booktabs replace ///
		refcat(cum_capacity_turbine "\emph{IV: Median wind potential * time}", nolabel) ///
		b(%9.3f) se noconstant noobs nonotes star(* 0.10 ** 0.05 *** 0.01) ///
		varlabels(cum_capacity_turbine "Cumulative capacity (MW)" cum_count_turbine "Cumulative count") varwidth(27) modelwidth(13) ///
		mtitles("Model" "Model" "Model" "Model") ///
		mgroups("Pro-Environment Vote" "Anti-Environment Vote", pattern(1 0 1 0) prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat(\cmidrule(lr){@span})) ///
		width(\hsize)

** Max
esttab pro_capacity_max_iv pro_count_max_iv anti_capacity_max_iv anti_count_max_iv using TableA17.tex, booktabs append ///
		nomtitles se noconstant nonotes legend nonumbers collabels(none) star(* 0.10 ** 0.05 *** 0.01) ///
		b(%9.3f) stats(N N_clust r2, labels("Observations" "Districts" "\(R^{2}\)") fmt(0 0 2)) ///
		varlabels(cum_capacity_turbine "Cumulative capacity (MW)" cum_count_turbine "Cumulative count") varwidth(27) modelwidth(13) ///
		refcat(cum_capacity_turbine "\emph{IV: Max wind potential * time}", nolabel) ///
		width(\hsize)


** Close log file
log close
cd "$rootDir/$logDir"
translate 004_analysis_environment.smcl 004_analysis_environment.log, replace

